test markers for CRE so that subtypes could be manipulated like specific activation or blockage
try entropy score H, also calculate Q|t for each subtype
source("/Shared_win/projects/RNA_normal/analysis.10x.r")
GEX.seur <- readRDS("../integration_Nb5d/sn10x_WYS.sct_anno.s.rds")
GEX.seur
## An object of class Seurat
## 47356 features across 19418 samples within 3 assays
## Active assay: SCT (20434 features, 0 variable features)
## 2 other assays present: RNA, integrated
## 3 dimensional reductions calculated: pca, tsne, umap
ref.seur <- readRDS("../../20230704_10x_SZJ/analysis_ref/GSE149524.P21.integration_Anno.s.rds")
ref.seur
## An object of class Seurat
## 37583 features across 4419 samples within 3 assays
## Active assay: SCT (16365 features, 0 variable features)
## 2 other assays present: RNA, integrated
## 3 dimensional reductions calculated: pca, tsne, umap
# define intAnno1/2 colors
color.A1 <- c("#678BB1","#8AB6CE","#3975C1","#669FDF","#4CC1BD",
"#BF7E6B","#D46B35","#F19258","#FF8080",
"#BDAE8D","#BD66C4","#C03778",
"#97BA59","#DFAB16","#2BA956","#9FE727")
color.A2 <- c("#678BB1","#8AB6CE","#3975C1","#669FDF","#4CC1BD",
"#BF7E6B","#D46B35","#F19258","#FF8080",
"#BDAE8D","#BD66C4","#C03778",
"#97BA59","#C4D116", "#DFAB16","#EDE25A", "#2BA956","#9FE727")
# define batch/condition colors
color.cnt <- scales::hue_pal()(4)[c(2,1,4,3)]
color.test1 <- color.cnt[1:2]
color.test2 <- color.cnt[3:4]
## define feature colors
# Cell2020
material.heat = function(n){
colorRampPalette(
c(
"#283593", # indigo 800
"#3F51B5", # indigo
"#2196F3", # blue
"#00BCD4", # cyan
"#4CAF50", # green
"#8BC34A", # light green
"#CDDC39", # lime
"#FFEB3B", # yellow
"#FFC107", # amber
"#FF9800", # orange
"#FF5722" # deep orange
)
)(n)
}
# Immunity2019, na gray
colors.Immunity <-c("#191970","#121285","#0C0C9A","#0707B0","#0101C5","#0014CF","#0033D3","#0053D8","#0072DD","#0092E1","#00B2E6",
"#00D1EB","#23E8CD","#7AF17B","#D2FA29","#FFEB00","#FFC300","#FF9B00","#FF8400","#FF7800","#FF6B00","#FF5F00","#FF5300",
"#FF4700","#F73B00","#EF2E00","#E62300","#DD1700","#D50B00","#CD0000")
# NatNeur2021, sc-neurons
color.ref <- c("#8AB6CE","#678BB1","#3975C1","#4CC1BD",
"#C03778","#97BA59","#DFAB16","#BF7E6B",
"#D46B35","#BDAE8D","#BD66C4","#2BA956",
"#FF8080","#FF8080","#FF8080","#FF0000")
cowplot::plot_grid(
DimPlot(GEX.seur, reduction = "umap", group.by = "intAnno1", label = T, label.size = 3.25,repel = F, pt.size = 0.05,
cols = color.A1),
DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "cnt",
cols = color.cnt) ,
rel_widths = c(4.8,5),
ncol = 2)
cowplot::plot_grid(
DimPlot(GEX.seur, reduction = "umap", group.by = "intAnno2", label = T, label.size = 2.65,repel = F, pt.size = 0.05,
cols = color.A2),
DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "cnt",
cols = color.cnt) ,
rel_widths = c(4.85,5),
ncol = 2)
test1.seur <- subset(GEX.seur, subset= cnt %in% c("Nb5d.PBS","Nb5d.INF"))
test1.seur
## An object of class Seurat
## 47356 features across 8232 samples within 3 assays
## Active assay: SCT (20434 features, 0 variable features)
## 2 other assays present: RNA, integrated
## 3 dimensional reductions calculated: pca, tsne, umap
cowplot::plot_grid(
DimPlot(test1.seur, reduction = "umap", group.by = "intAnno1", label = T, label.size = 3.25,repel = F, pt.size = 0.15,
cols = color.A1),
DimPlot(test1.seur, label = F, pt.size = 0.15, repel = F, reduction = 'umap', group.by = "cnt",
cols = color.test1) ,
rel_widths = c(4.8,5),
ncol = 2)
cowplot::plot_grid(
DimPlot(test1.seur, reduction = "umap", group.by = "intAnno2", label = T, label.size = 2.65,repel = F, pt.size = 0.15,
cols = color.A2),
DimPlot(test1.seur, label = F, pt.size = 0.15, repel = F, reduction = 'umap', group.by = "cnt",
cols = color.test1) ,
rel_widths = c(4.85,5),
ncol = 2)
PBS.seur <- subset(GEX.seur, subset= cnt %in% c("Nb5d.PBS"))
PBS.seur
## An object of class Seurat
## 47356 features across 4789 samples within 3 assays
## Active assay: SCT (20434 features, 0 variable features)
## 2 other assays present: RNA, integrated
## 3 dimensional reductions calculated: pca, tsne, umap
cowplot::plot_grid(
DimPlot(PBS.seur, reduction = "umap", group.by = "intAnno1", label = T, label.size = 3.25,repel = F, pt.size = 0.15,
cols = color.A1),
DimPlot(PBS.seur, label = F, pt.size = 0.15, repel = F, reduction = 'umap', group.by = "cnt",
cols = color.test1) ,
rel_widths = c(4.8,5),
ncol = 2)
cowplot::plot_grid(
DimPlot(PBS.seur, reduction = "umap", group.by = "intAnno2", label = T, label.size = 2.65,repel = F, pt.size = 0.15,
cols = color.A2),
DimPlot(PBS.seur, label = F, pt.size = 0.15, repel = F, reduction = 'umap', group.by = "cnt",
cols = color.test1) ,
rel_widths = c(4.85,5),
ncol = 2)
#test2.seur <- subset(GEX.seur, subset= cnt %in% c("Nb5d.CTL","Nb5d.CKO"))
#test2.seur
markers.new <- read.csv("../integration_Nb5d/Baf53cre_Nb.markers.SCT_intAnno1.202402.csv")
markers.new$cluster <- factor(as.character(markers.new$cluster),
levels = levels(GEX.seur$intAnno1))
head(markers.new)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## 1 0 1.824018 0.953 0.335 0 EMN1 Ptprt
## 2 0 1.668551 0.981 0.495 0 EMN1 Tshz2
## 3 0 1.585436 0.938 0.319 0 EMN1 Bnc2
## 4 0 1.384775 0.909 0.397 0 EMN1 Grik1
## 5 0 1.301007 1.000 0.793 0 EMN1 Rbfox1
## 6 0 1.107801 0.992 0.864 0 EMN1 Negr1
cut.pct = 0.1
cut.padj = 0.001
markers.n <- (markers.new %>% filter(pct.1 > cut.pct & p_val_adj < cut.padj))$gene
markers.n <- unique(markers.n)
#head(markers.n)
length(markers.n)
## [1] 3109
DIG <- grep("^mt-|^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^Jun|^Fos|^AY|^AA|^AC|^AW|^BC|^Gm|^Hist|Lars2|Rik$|-ps",
markers.n,value = T)
markers.n <- setdiff(markers.n, DIG)
length(markers.n)
## [1] 2679
mat.n <- AverageExpression(GEX.seur,
assays = "SCT",
features = markers.n,
return.seurat = F,
group.by = "intAnno1",
layer="data")
## The following functions and any applicable methods accept the dots: CreateSeuratObject
# AverageExpression() would recover the log2-data, here add log2 back
mat.n <- log2(mat.n$SCT+1)
mat.n <- as.data.frame(mat.n)
dim(mat.n)
## [1] 2679 16
mat.n[1:5,]
## EMN1 EMN2 EMN3 EMN4 EMN5 IMN1 IMN2
## Ptprt 2.655444 1.922185 0.6064187 0.1626202 0.2422009 0.1698824 0.1398116
## Tshz2 2.921009 2.455140 0.7958795 0.2954559 0.6207125 1.4444638 0.4100883
## Bnc2 2.440412 2.029988 1.1821320 1.5221357 2.1438358 0.1392112 0.1139562
## Grik1 2.632152 2.932916 1.7605224 1.3623424 0.6407503 0.4778998 0.5302975
## Rbfox1 4.464304 4.436291 3.4913102 2.5482965 1.2826152 1.0258514 1.3658601
## IMN3 IMN4 IN1 IN2 IN3 IPAN1 IPAN2
## Ptprt 0.12504700 0.07246964 1.8134133 1.2097636 1.518071 0.9160788 0.8096295
## Tshz2 0.32598631 0.56145575 0.4183887 0.3957756 2.671051 0.7400137 0.8958165
## Bnc2 0.06803752 0.24100810 0.1766197 0.1069152 1.764711 0.1519077 0.8054275
## Grik1 0.38719387 0.41503750 0.3286227 0.1796715 0.971711 0.2279666 0.2410081
## Rbfox1 2.21907111 2.54357087 1.3383857 2.8662705 4.931885 4.4772219 2.3510499
## IPAN3 IPAN4
## Ptprt 0.2529807 0.2324798
## Tshz2 0.6322682 0.7312857
## Bnc2 0.2326608 0.3030989
## Grik1 0.3504972 0.6286980
## Rbfox1 2.7970130 3.3886686
## shanno.entropy
# ref. https://davetang.org/muse/2013/08/28/tissue-specificity/
#
# Q version for specific tissue
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1088961/
# 'Measuring tissue specificity with entropy: Qg|t'
shannon.entropy <- function(p,returnH=TRUE,returnQ=FALSE){
if (min(p) < 0 || sum(p) <= 0 || is.na(p[1])) return(NA)
else {p.norm <- p[p>0]/sum(p)
if(returnH==TRUE){
return(-sum(log2(p.norm)*p.norm))
}else if(returnQ==TRUE){
return(-sum(log2(p.norm)*p.norm) - log2(p/sum(p)))
}else{
return(NA)
}
}
}
ShE <- rep(NA,nrow(mat.n))
for(i in 1:nrow(mat.n)){
ShE[i] <- shannon.entropy(as.vector(as.matrix(mat.n[i,])))
}
mat.n$ShE.H <- ShE
dim(mat.n)
## [1] 2679 17
mat.n[1:5,]
## EMN1 EMN2 EMN3 EMN4 EMN5 IMN1 IMN2
## Ptprt 2.655444 1.922185 0.6064187 0.1626202 0.2422009 0.1698824 0.1398116
## Tshz2 2.921009 2.455140 0.7958795 0.2954559 0.6207125 1.4444638 0.4100883
## Bnc2 2.440412 2.029988 1.1821320 1.5221357 2.1438358 0.1392112 0.1139562
## Grik1 2.632152 2.932916 1.7605224 1.3623424 0.6407503 0.4778998 0.5302975
## Rbfox1 4.464304 4.436291 3.4913102 2.5482965 1.2826152 1.0258514 1.3658601
## IMN3 IMN4 IN1 IN2 IN3 IPAN1 IPAN2
## Ptprt 0.12504700 0.07246964 1.8134133 1.2097636 1.518071 0.9160788 0.8096295
## Tshz2 0.32598631 0.56145575 0.4183887 0.3957756 2.671051 0.7400137 0.8958165
## Bnc2 0.06803752 0.24100810 0.1766197 0.1069152 1.764711 0.1519077 0.8054275
## Grik1 0.38719387 0.41503750 0.3286227 0.1796715 0.971711 0.2279666 0.2410081
## Rbfox1 2.21907111 2.54357087 1.3383857 2.8662705 4.931885 4.4772219 2.3510499
## IPAN3 IPAN4 ShE.H
## Ptprt 0.2529807 0.2324798 3.359455
## Tshz2 0.6322682 0.7312857 3.583557
## Bnc2 0.2326608 0.3030989 3.280647
## Grik1 0.3504972 0.6286980 3.468434
## Rbfox1 2.7970130 3.3886686 3.862025
#write.csv(mat.n, "test_CREmarkers/matrix.avgExp_ShE.H.csv")
plot(sort(mat.n$ShE.H),
main= "",
ylab = "Entropy Score",
xlab = "Sorted Filtered Markers")
abline(h=1)
abline(h=1.5)
abline(h=2)
abline(h=2.5)
abline(h=3)
abline(h=3.5)
#
ShE.Q <- matrix(data = NA,
nrow = length(markers.n),
ncol = length(levels(markers.new$cluster)))
rownames(ShE.Q) <- markers.n
colnames(ShE.Q) <- levels(markers.new$cluster)
ShE.Q[1:5,1:5]
## EMN1 EMN2 EMN3 EMN4 EMN5
## Ptprt NA NA NA NA NA
## Tshz2 NA NA NA NA NA
## Bnc2 NA NA NA NA NA
## Grik1 NA NA NA NA NA
## Rbfox1 NA NA NA NA NA
for(ii in 1:nrow(ShE.Q)){
ShE.Q[ii,] <- shannon.entropy(as.vector(as.matrix(mat.n[ii,1:16])),
returnH = FALSE, returnQ = TRUE)
}
ShE.Q <- as.data.frame(ShE.Q)
for(nn in 1:ncol(ShE.Q)){
ShE.Q[,nn][is.infinite(ShE.Q[,nn])] <- 12
}
sum(is.na(ShE.Q))
## [1] 0
sum(is.infinite(ShE.Q$EMN3))
## [1] 0
ShE.Q[1:5,1:5]
## EMN1 EMN2 EMN3 EMN4 EMN5
## Ptprt 5.634029 6.100235 7.764596 9.663404 9.088706
## Tshz2 6.065198 6.315860 7.941043 9.370651 8.299668
## Bnc2 5.740055 6.005709 6.785789 6.421083 5.926986
## Grik1 5.886464 5.730371 6.466703 6.836617 7.924873
## Rbfox1 7.212261 7.221343 7.566928 8.021163 9.011608
ShE.Q %>% arrange(EMN1) %>% head(8)
## EMN1 EMN2 EMN3 EMN4 EMN5 IMN1 IMN2
## Dsc3 3.764107 4.915328 8.749315 8.880889 7.554829 8.159380 8.420940
## Dock2 4.061433 4.298919 8.236083 8.946709 5.261573 9.774764 10.477792
## Caln1 4.155643 5.198258 8.979109 9.734998 7.155134 8.717892 8.528550
## Tmem163 4.203885 3.786503 7.747804 7.263834 7.865115 8.721184 9.312186
## Pi15 4.390661 5.298218 8.973564 8.692491 7.781209 8.677110 9.090279
## Tmem132cos 4.395585 6.244501 9.549886 9.268456 8.187378 9.596800 9.483146
## Insyn2b 4.521230 4.610104 8.926992 8.814650 5.447574 9.643589 10.347058
## Tmem132c 4.591526 6.362202 9.608157 8.446722 8.762290 9.528402 9.583124
## IMN3 IMN4 IN1 IN2 IN3 IPAN1 IPAN2
## Dsc3 8.963182 8.055549 8.755171 7.573888 6.714365 8.114485 7.755140
## Dock2 11.757571 8.593396 8.972257 8.372613 7.197920 9.200830 7.933046
## Caln1 8.916975 9.061784 8.808725 8.841002 4.334564 8.527540 9.309343
## Tmem163 8.859407 8.272979 9.233480 10.040841 7.600900 8.659436 8.781755
## Pi15 8.194369 8.919823 7.809301 6.299029 7.667099 8.795553 8.236121
## Tmem132cos 10.760865 9.588758 8.562376 8.565222 4.727123 9.125146 6.488263
## Insyn2b 9.723543 9.134953 8.689117 9.326240 7.061131 8.917851 8.101906
## Tmem132c 9.174982 8.877341 8.322807 9.067038 5.300405 8.633030 6.702962
## IPAN3 IPAN4
## Dsc3 12.000000 8.003080
## Dock2 12.000000 7.569130
## Caln1 10.180289 8.576720
## Tmem163 7.093112 8.893623
## Pi15 9.457682 7.748585
## Tmem132cos 6.754561 8.110178
## Insyn2b 8.265891 7.452693
## Tmem132c 6.577545 8.358975
ShE.Q %>% arrange(IPAN1) %>% head(8)
## EMN1 EMN2 EMN3 EMN4 EMN5 IMN1 IMN2
## Btg4 8.492209 12.000000 12.000000 12.000000 12.000000 8.021425 7.142348
## Itgb6 7.933657 8.291203 7.294544 9.327260 9.406877 8.354383 8.282223
## Dapk2 7.928384 8.544685 8.321872 8.845453 7.927003 8.263939 8.383973
## Layn 7.610403 6.814437 8.226534 12.000000 7.609021 7.246876 8.066515
## Kit 7.380907 7.098536 6.225324 12.000000 7.478922 7.116777 6.616599
## Otof 7.624147 8.147214 7.149723 6.548392 7.943731 7.582136 8.079476
## Smad6 8.796843 9.998357 8.998357 8.715843 12.000000 8.673941 8.668697
## Oc90 8.632864 6.668358 8.834504 7.552831 12.000000 7.969786 9.089238
## IMN3 IMN4 IN1 IN2 IN3 IPAN1 IPAN2
## Btg4 7.102310 6.512263 12.000000 5.705997 12.000000 1.068297 5.763363
## Itgb6 7.508182 9.234286 8.617323 12.000000 12.000000 1.224553 5.243536
## Dapk2 8.343975 7.754658 6.726031 6.629982 6.084370 1.581388 7.781798
## Layn 8.026477 7.436431 6.819467 12.000000 5.761401 1.740145 6.949955
## Kit 6.897826 12.000000 12.000000 12.000000 12.000000 1.954451 4.979762
## Otof 7.363591 7.188595 8.152244 7.962941 12.000000 2.007532 7.606490
## Smad6 9.212917 12.000000 9.588638 8.814697 4.653266 2.119263 6.296300
## Oc90 7.051353 7.459858 12.000000 7.651685 12.000000 2.185270 6.127824
## IPAN3 IPAN4
## Btg4 12.000000 12.000000
## Itgb6 7.778662 7.542432
## Dapk2 12.000000 8.378327
## Layn 12.000000 12.000000
## Kit 12.000000 12.000000
## Otof 7.313583 8.395054
## Smad6 5.594141 8.248716
## Oc90 6.004234 7.085704
ShE.Q %>% arrange(EMN5) %>% head(8)
## EMN1 EMN2 EMN3 EMN4 EMN5 IMN1 IMN2
## Nfatc1 9.786706 9.766938 7.036251 4.006556 1.637709 8.400180 7.441072
## Egfros 7.023554 6.591335 9.581713 3.945756 2.161482 11.300388 8.837435
## Egfr 7.128813 5.996650 9.037511 4.288033 2.317219 10.170273 9.706171
## Fut9 11.254590 9.017041 8.022038 5.325862 2.348509 10.784072 12.073257
## Ntn1 8.567217 8.018644 8.716150 5.462094 2.671150 7.000452 10.968364
## Kl 10.046038 8.442420 7.122169 4.482818 2.858706 7.991823 8.696177
## Lmo7 9.712149 9.692376 10.691113 7.411904 3.048897 9.019270 9.946268
## Kcnip1 8.446853 7.158625 6.896833 3.589648 3.171838 10.604803 8.727645
## IMN3 IMN4 IN1 IN2 IN3 IPAN1 IPAN2
## Nfatc1 9.566040 6.982510 9.941761 9.167820 12.000000 8.851137 8.810835
## Egfros 7.799586 6.478359 8.758493 12.000000 12.000000 9.987253 10.624615
## Egfr 6.941407 6.675475 12.000000 8.271019 12.000000 8.689391 10.493963
## Fut9 12.000000 7.291223 9.412990 9.637611 12.000000 11.055035 9.281160
## Ntn1 6.770621 7.025734 8.308096 8.532718 12.000000 7.954415 7.593738
## Kl 8.656139 12.000000 9.031860 6.676766 12.000000 9.261736 12.000000
## Lmo7 12.000000 12.000000 4.981694 7.510622 6.641859 6.611811 5.769740
## Kcnip1 8.011760 12.000000 8.386489 9.609204 6.749267 8.808044 9.252219
## IPAN3 IPAN4
## Nfatc1 12.000000 12.000000
## Egfros 12.000000 9.416041
## Egfr 12.000000 12.000000
## Fut9 4.914329 5.385352
## Ntn1 7.883360 7.647129
## Kl 5.616417 8.690525
## Lmo7 12.000000 7.359184
## Kcnip1 12.000000 10.041810
VlnPlot(test1.seur, features = c("Dsc3","Dock2","Caln1","Tmem163"), group.by = "intAnno1", assay = "RNA", ncol = 2, cols = color.A1)
VlnPlot(test1.seur, features = c("Btg4","Itgb6","Dapk2","Layn"), group.by = "intAnno1", assay = "RNA", ncol = 2, cols = color.A1)
par(mfrow=c(4,4))
lapply(colnames(ShE.Q),function(x){
plot(sort(ShE.Q[,x]),
main= x,
ylab = "Entropy Score - Q|t",
xlab = "Sorted Filtered Markers")
abline(h=4)
abline(h=5)
abline(h=6)
abline(h=7)
abline(h=8)
})
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
##
## [[10]]
## NULL
##
## [[11]]
## NULL
##
## [[12]]
## NULL
##
## [[13]]
## NULL
##
## [[14]]
## NULL
##
## [[15]]
## NULL
##
## [[16]]
## NULL
markers.filt1 <- markers.new %>% filter(gene %in% markers.n)
# add avg.expression
markers.filt1$avg.exp <- "NA"
for(ii in 1:nrow(markers.filt1)){
markers.filt1$avg.exp[ii] <- round(mat.n[markers.filt1$gene[ii],markers.filt1$cluster[ii]],6)
}
# add ShE.H
markers.filt1$ShE.H <- "NA"
for(ii in 1:nrow(markers.filt1)){
markers.filt1$ShE.H[ii] <- round(mat.n[markers.filt1$gene[ii],"ShE.H"],6)
}
# add ShE.Q
markers.filt1$ShE.Q <- "NA"
for(ii in 1:nrow(markers.filt1)){
markers.filt1$ShE.Q[ii] <- round(ShE.Q[markers.filt1$gene[ii],markers.filt1$cluster[ii]],6)
}
markers.filt1 <- markers.filt1 %>% arrange(cluster, ShE.Q)
dim(markers.new)
## [1] 10134 7
dim(markers.filt1)
## [1] 8098 10
head(markers.new)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## 1 0 1.824018 0.953 0.335 0 EMN1 Ptprt
## 2 0 1.668551 0.981 0.495 0 EMN1 Tshz2
## 3 0 1.585436 0.938 0.319 0 EMN1 Bnc2
## 4 0 1.384775 0.909 0.397 0 EMN1 Grik1
## 5 0 1.301007 1.000 0.793 0 EMN1 Rbfox1
## 6 0 1.107801 0.992 0.864 0 EMN1 Negr1
head(markers.filt1)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## 1 0.000000e+00 0.3833830 0.256 0.027 0.000000e+00 EMN1 Dsc3
## 2 0.000000e+00 0.5050384 0.338 0.055 0.000000e+00 EMN1 Dock2
## 3 0.000000e+00 0.6942665 0.431 0.049 0.000000e+00 EMN1 Caln1
## 4 3.401898e-257 0.3335389 0.237 0.057 6.951438e-253 EMN1 Tmem163
## 5 0.000000e+00 0.3519255 0.251 0.032 0.000000e+00 EMN1 Pi15
## 6 0.000000e+00 0.4348302 0.278 0.026 0.000000e+00 EMN1 Tmem132cos
## avg.exp ShE.H ShE.Q
## 1 0.424829 2.662889 3.764107
## 2 0.601381 2.558888 4.061433
## 3 0.779451 2.605756 4.155643
## 4 0.436328 2.527474 4.203885
## 5 0.401343 2.994079 4.390661
## 6 0.477657 2.831334 4.395585
#write.csv(ShE.Q, "test_CREmarkers/matrix.ShE.Q.csv")
#write.csv(markers.filt1, "test_CREmarkers/markers.filt2679.Qsorted.csv")
table(markers.new$cluster)
##
## EMN1 EMN2 EMN3 EMN4 EMN5 IMN1 IMN2 IMN3 IMN4 IN1 IN2 IN3 IPAN1
## 356 663 329 610 777 377 530 943 884 556 801 372 1148
## IPAN2 IPAN3 IPAN4
## 622 501 665
table(markers.filt1$cluster)
##
## EMN1 EMN2 EMN3 EMN4 EMN5 IMN1 IMN2 IMN3 IMN4 IN1 IN2 IN3 IPAN1
## 321 589 285 465 568 323 449 733 685 462 582 278 950
## IPAN2 IPAN3 IPAN4
## 505 378 525
markers.filt1 %>% filter(ShE.Q < 6) %>% group_by(cluster) %>% summarise(n=n())
## # A tibble: 16 x 2
## cluster n
## <fct> <int>
## 1 EMN1 48
## 2 EMN2 42
## 3 EMN3 30
## 4 EMN4 46
## 5 EMN5 79
## 6 IMN1 38
## 7 IMN2 39
## 8 IMN3 75
## 9 IMN4 91
## 10 IN1 88
## 11 IN2 84
## 12 IN3 69
## 13 IPAN1 176
## 14 IPAN2 96
## 15 IPAN3 87
## 16 IPAN4 91
markers.filt1 %>% filter(ShE.H < 3) %>% group_by(cluster) %>% summarise(n=n())
## # A tibble: 16 x 2
## cluster n
## <fct> <int>
## 1 EMN1 17
## 2 EMN2 13
## 3 EMN3 10
## 4 EMN4 23
## 5 EMN5 37
## 6 IMN1 13
## 7 IMN2 16
## 8 IMN3 23
## 9 IMN4 40
## 10 IN1 43
## 11 IN2 43
## 12 IN3 33
## 13 IPAN1 76
## 14 IPAN2 54
## 15 IPAN3 48
## 16 IPAN4 52
markers.filt1 %>% filter(ShE.H < 3 & ShE.Q < 6) %>% group_by(cluster) %>% summarise(n=n())
## # A tibble: 16 x 2
## cluster n
## <fct> <int>
## 1 EMN1 16
## 2 EMN2 12
## 3 EMN3 9
## 4 EMN4 22
## 5 EMN5 34
## 6 IMN1 12
## 7 IMN2 13
## 8 IMN3 20
## 9 IMN4 34
## 10 IN1 41
## 11 IN2 40
## 12 IN3 31
## 13 IPAN1 72
## 14 IPAN2 53
## 15 IPAN3 46
## 16 IPAN4 49
#markers.filt2 <- markers.filt1 %>% filter(ShE.H < 3) %>% group_by(cluster) %>% arrange(cluster,ShE.Q)
#markers.filt2
markers.filt2 <- markers.filt1 %>% group_by(cluster) %>% arrange(cluster,ShE.Q) %>% do(head(.,n=60))
markers.filt2 %>% head(8)
## # A tibble: 8 x 10
## # Groups: cluster [1]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene avg.exp ShE.H ShE.Q
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr> <chr> <chr> <chr>
## 1 0 0.383 0.256 0.027 0 EMN1 Dsc3 0.4248~ 2.66~ 3.76~
## 2 0 0.505 0.338 0.055 0 EMN1 Dock2 0.6013~ 2.55~ 4.06~
## 3 0 0.694 0.431 0.049 0 EMN1 Caln1 0.7794~ 2.60~ 4.15~
## 4 3.40e-257 0.334 0.237 0.057 6.95e-253 EMN1 Tmem163 0.4363~ 2.52~ 4.20~
## 5 0 0.352 0.251 0.032 0 EMN1 Pi15 0.4013~ 2.99~ 4.39~
## 6 0 0.435 0.278 0.026 0 EMN1 Tmem13~ 0.4776~ 2.83~ 4.39~
## 7 2.12e-276 0.263 0.204 0.036 4.33e-272 EMN1 Insyn2b 0.3196~ 2.80~ 4.52~
## 8 0 0.683 0.431 0.046 0 EMN1 Tmem13~ 0.7571~ 3.02~ 4.59~
pp.filt2 <- lapply(levels(GEX.seur$intAnno1), function(xx){
DotPlot(GEX.seur, features = (markers.filt2 %>% filter(cluster==xx))$gene, group.by = "intAnno1",
cols = c("midnightblue","darkorange1")) + labs(title=paste0(xx," - candidate CRE genes(entropy Q|t top40)") )+
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev)
})
#pp.filt2
markers.filt3 <- markers.filt1 %>% filter(pct.1>0.2) %>% group_by(cluster) %>% arrange(cluster,ShE.Q) %>% do(head(.,n=60))
markers.filt3 %>% head(8)
## # A tibble: 8 x 10
## # Groups: cluster [1]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene avg.exp ShE.H ShE.Q
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr> <chr> <chr> <chr>
## 1 0 0.383 0.256 0.027 0 EMN1 Dsc3 0.4248~ 2.66~ 3.76~
## 2 0 0.505 0.338 0.055 0 EMN1 Dock2 0.6013~ 2.55~ 4.06~
## 3 0 0.694 0.431 0.049 0 EMN1 Caln1 0.7794~ 2.60~ 4.15~
## 4 3.40e-257 0.334 0.237 0.057 6.95e-253 EMN1 Tmem163 0.4363~ 2.52~ 4.20~
## 5 0 0.352 0.251 0.032 0 EMN1 Pi15 0.4013~ 2.99~ 4.39~
## 6 0 0.435 0.278 0.026 0 EMN1 Tmem13~ 0.4776~ 2.83~ 4.39~
## 7 2.12e-276 0.263 0.204 0.036 4.33e-272 EMN1 Insyn2b 0.3196~ 2.80~ 4.52~
## 8 0 0.683 0.431 0.046 0 EMN1 Tmem13~ 0.7571~ 3.02~ 4.59~
#write.csv(markers.filt3, "test_CREmarkers/markers.pct0.2_Qtop60.csv")
pp.filt3 <- lapply(levels(GEX.seur$intAnno1), function(xx){
DotPlot(GEX.seur, features = (markers.filt3 %>% filter(cluster==xx))$gene, group.by = "intAnno1",
cols = c("midnightblue","darkorange1")) + labs(title=paste0(xx," - candidate CRE genes(pct>20%, entropy Q|t top60)") )+
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev)+
scale_color_gradientn(colours = material.heat(100))
})
pp.filt3
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
ppref.filt3 <- lapply(levels(GEX.seur$intAnno1), function(xx){
DotPlot(ref.seur, features = (markers.filt3 %>% filter(cluster==xx))$gene, group.by = "Anno2",
cols = c("midnightblue","darkorange1")) + labs(title=paste0(xx," - candidate CRE genes(check in NatNeur2021)") )+
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev)+
scale_color_gradientn(colours = material.heat(100))
})
ppref.filt3
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
xx=16
cowplot::plot_grid(
pp.filt3[[xx]],
ppref.filt3[[xx]],
ncol = 1
)